/*
 * Code for sending, receiving, and responding to probes
 */
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
#include <netinet/in.h>

#include "libfma.h"
#include "lf_fabric.h"
#include "lf_scheduler.h"
#include "lf_fma_comm.h"
#include "lf_xbar32.h"
#include "lf_myri_packet.h"
#include "lf_topo_map.h"
#include "lf_fma_flags.h"
#include "lf_lag.h"

#include "libmyri.h"

#include "fma.h"
#include "fma_fabric.h"
#include "fma_myri.h"
#include "fma_map.h"
#include "fma_probe.h"
#include "fma_verify.h"
#include "fma_settings.h"
#include "fma_sync_timer.h"

/*
 * Local prototypes
 */
static void fma_initiate_verify_probe(void *vpdp);
static void fma_got_link_verify_resp(struct fma_probe_desc *pdp,
                                    void *pkt, int len);
static void fma_link_verify_failed(struct fma_probe_desc *pdp);
static int fma_find_verify_route(struct fma_probe_desc *pdp);
static void fma_define_one_empty_port_probe(union lf_node *np,
  int port, int tagged);
static void fma_define_empty_nic_probe(union lf_node *np, int port);
static int fma_find_empty_nic_route(struct fma_probe_desc *pdp, struct lf_xbar *xp, int port);
static int fma_find_empty_port_route( struct fma_probe_desc *pdp,
  struct lf_xbar *xp, int port, int tagged);

static void fma_empty_port_resp(struct fma_probe_desc *pdp,
  void *pkt, int len);
static void fma_empty_port_failed(struct fma_probe_desc *pdp);
static void fma_nic_probe_failed(struct fma_probe_desc *pdp);
static void fma_nic_probe_resp(struct fma_probe_desc *pdp,void *vpkt,int len);
static int fma_find_subfab_empty_ports(struct lf_fabric *fp,
  struct fma_nic_info *nip, int nic_port, struct lf_xbar **subfab_xbars,
  int *subfab_xbar_ports);
static void fma_create_disconnected_nic_probes(void);


void dumpit(struct lf_fabric *);

/*
 * Clear all verify probes
 */
void
fma_clear_verify_probes()
{
  struct fma_probe_desc *pdp;
  struct fma_probe_desc *npdp;

  if (A.debug) fma_log("clearing verify probes");

  npdp = A.myri->verify_anchor->user_next;
  while (npdp != A.myri->verify_anchor) {
    pdp = npdp;
    npdp = npdp->user_next;

    /* unlink this from user and main lists */
    pdp->user_prev->user_next = pdp->user_next;
    pdp->user_next->user_prev = pdp->user_prev;
    fma_unlink_probe_descriptor(pdp);

    /* cancel any pending timeout */
    if (pdp->pd_timer != NULL) {
      lf_remove_event(pdp->pd_timer);
      pdp->pd_timer = NULL;
    }

    /* cancel any pending sync timeout */
    if (pdp->pd_sync_timer != NULL) {
      fma_sync_timer_cancel(pdp->pd_sync_timer);
      pdp->pd_sync_timer = NULL;
    }

    /* destroy the verify probe */
    LF_FREE(pdp);
  }
}

/*
 * Define a link verify probe for an xbar and port
 */
int
fma_define_link_verify_probe(
  struct lf_xbar *xp,
  int port)
{
  struct fma_settings *asp;

  struct fma_probe_desc *pdp;
  struct fma_probe_desc *anchor;
  int first_interval;
  int rc;

  anchor = A.myri->verify_anchor;
  asp = A.settings;

  /* allocate a probe descriptor for this verify probe */
  LF_CALLOC(pdp, struct fma_probe_desc, 1);

  /* fill in fields for the probe */
  pdp->pd_type = FMA_PT_LINK_VERIFY;
  pdp->pd_timeout = asp->verify_timeout;
  pdp->u.link_verify.lv_xbar = xp;
  pdp->u.link_verify.lv_xbar_port = port;
  pdp->resp_rtn = fma_got_link_verify_resp;
  pdp->fail_rtn = fma_link_verify_failed;
  pdp->persistant = TRUE;

  /* find the local NIC, port, and route we will use to verify this link */
  rc = fma_find_verify_route(pdp);
  if (rc < 0) {

    /* rc of -1 means map is likely invalid */
    if (rc == -1) {
      fma_save_error(__FILE__, __LINE__, 0,
	  "Error calculating verify route, invalid map?");

      /* Free this fledgling probe descriptor and invalidate the map */
      fma_map_is_invalid(TRUE, "Error calculating verify route");

      fma_perror();
    } else if (A.debug > 1) {
      fma_perror();
    } else {
      fma_reset_error();
    }

    /* Free descriptor and return */
    LF_FREE(pdp);
    return -1;
  }

  /* attach it to list of verify probes */
  pdp->user_next = anchor->user_next;
  pdp->user_prev = anchor;
  pdp->user_next->user_prev = pdp;
  pdp->user_prev->user_next = pdp;

  /* Fill in the packet for this probe, except for serial # */
  fma_fill_nic_verify_scout(&pdp->packet.nic_scout, pdp->origin, pdp->pd_port,
                     0, pdp->pd_route, pdp->pd_route_len);
  pdp->pkt_len = sizeof(pdp->packet.nic_scout);

if (A.debug > 1) {
fma_log("define verify %s, nic %d, port %d route=%s, reply=%s",
    lf_node_string(LF_NODE(pdp->u.link_verify.lv_xbar),
		   pdp->u.link_verify.lv_xbar_port),
    pdp->origin->nic_id, pdp->pd_port,
    lf_route_str(pdp->pd_route, pdp->pd_route_len),
    lf_route_str(pdp->packet.nic_scout.reply_route,
		 pdp->packet.nic_scout.reply_route_len_8));
}

  /* schedule the verify probe for the first time */
  if (asp->verify_interval > 0) {
    first_interval = random() % asp->verify_interval;
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	first_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error scheduling verify probe"));
  }
  return 0;

 except:
  fma_perror_exit(1);
  return -1;
}

/*
 * Start a verify probe cycle on a link
 */
static void
fma_initiate_verify_probe(
  void *vpdp)
{
  struct fma_probe_desc *pdp;
  struct fma_settings *asp;
  struct fma_nic_verify_scout_opaque_data *sodp;

  asp = A.settings;
  pdp = vpdp;

  pdp->pd_timer = NULL;	/* no timer active now */

  /* 
   * If verify interval has been set to zero, then just shut
   * this probe down for now
   */
  if (asp->verify_interval == 0) {
    return;
  }

  /* If this probe is still pending, skip this time around */
  if (pdp->pd_sync_timer != NULL) {

    fma_log("Deferring verify, probe still pending");

    /* start re-probe timer */
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling verify probe"));

    return;
  }

  /* fill in serial number and retries */
  pdp->pd_serial = fma_myri_next_probe_serial();
  if (pdp->pd_next != NULL) {
    fma_log("probe descriptor should be unlinked here!");
    abort();
  }

  /* If the link is expected to be down, just try once,
   * else specified number of times
   */
  if (pdp->pd_type == FMA_PT_LINK_VERIFY) {
    sodp = (struct fma_nic_verify_scout_opaque_data *)
      &pdp->packet.nic_scout.opaque_scout_data;
    sodp->serial_32 = htonl(pdp->pd_serial);
    if (pdp->u.link_verify.lv_link_down) {
      pdp->retries_left = 0;
    } else {
      pdp->retries_left = asp->verify_retries;
    }

  /* same for xbar verifies */
  } else if (pdp->pd_type == FMA_PT_XBAR_VERIFY) {
    struct lf_nic *nicp;
    struct lf_xbar *xp;
    int xport;

    pdp->packet.xbar_scout_pkt.serial_32 = htonl(pdp->pd_serial);

    /* get NIC pointer */
    nicp = pdp->origin->nic_ptr;
    xp = LF_XBAR(nicp->topo_ports[pdp->pd_port]);
    xport = nicp->topo_rports[pdp->pd_port];

    /* If the link is expected to be down, ask for a remap to pick it up */
    if (xp == NULL || xp->link_state[xport] != LF_LINK_STATE_UP) {
      pdp->retries_left = 0;
    } else {
      pdp->retries_left = asp->verify_retries;
    }

  /* just try once when checking empty ports for an xbar */
  } else if (pdp->pd_type == FMA_PT_EMPTY_PORT_CHECK_XBAR) {
    pdp->packet.xbar_scout_pkt.serial_32 = htonl(pdp->pd_serial);
    pdp->retries_left = 0;

  /* just try once when checking empty ports for a NIC */
  } else if (pdp->pd_type == FMA_PT_EMPTY_PORT_CHECK_NIC) {
    sodp = (struct fma_nic_verify_scout_opaque_data *)
      &pdp->packet.nic_scout.opaque_scout_data;
    sodp->serial_32 = htonl(pdp->pd_serial);
    pdp->retries_left = 0;

  } else if (pdp->pd_type == FMA_PT_NIC_VERIFY){

    /*keep verifying the nic*/
    sodp = (struct fma_nic_verify_scout_opaque_data *)
      &pdp->packet.nic_scout.opaque_scout_data;
    sodp->serial_32 = htonl(pdp->pd_serial);
    pdp->retries_left = asp->verify_retries; 
  }
  
  /* start the send of the packet */
  fma_send_probe_packet(pdp);			/* send the packet */

  /* put this on probe list to be found */
  fma_link_probe_descriptor(pdp);

  return;

 except:
  fma_perror_exit(1);
}

/*
 * Got a verify probe response.
 * If we were expecting the link to be down,
 * XXX request a remap
 * If we are expecting it to be up, deactivate the probe and schedule
 * a re-probe for later
 */
static void
fma_got_link_verify_resp(
  struct fma_probe_desc *pdp,
  void *vpkt,
  int len)
{
  struct fma_settings *asp;
  lf_string_t why;

  asp = A.settings;

  /* If the link is expected to be down, ask for a remap to pick it up */
  if (pdp->u.link_verify.lv_link_down) {
    sprintf(why,
      "resp from down port x%dp%d, nic %d, port %d route=%s, reply=%s,"
      " remote=%s",
      pdp->u.link_verify.lv_xbar->x_topo_index,
      pdp->u.link_verify.lv_xbar_port,
      pdp->origin->nic_id, pdp->pd_port,
      lf_route_str(pdp->pd_route, pdp->pd_route_len),
      lf_route_str(pdp->packet.nic_scout.reply_route,
		   pdp->packet.nic_scout.reply_route_len_8),
      lf_node_string(LF_NODE(pdp->u.nic_verify.nv_nic),
		     pdp->u.nic_verify.nv_port));

    fma_map_is_invalid(TRUE, why);
  }

  /* start re-probe timer */
  if (pdp->pd_timer != NULL) {
    LF_ERROR(("pd_timer should be NULL here!"));
  }
  if (asp->verify_interval > 0) {
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling verify probe"));
  }

  return;

 except:
  fma_perror_exit(1);
}

/*
 * Failed to get a response from a verify probe
 * Beginner: just request a re-map of the fabric
 * Advanced: try to isolate the actual non-working link in the chain
 */
static void
fma_link_verify_failed(
  struct fma_probe_desc *pdp)
{
  struct fma_settings *asp;
  lf_string_t why;

  asp = A.settings;

  /*
   * If we expect the link to be up, report that this map is invalid
   */
  if (!pdp->u.link_verify.lv_link_down) {

    sprintf(why, "verify failed %s, nic %d, port %d route=%s reply=%s,"
	" remote=%s",
	lf_node_string(LF_NODE(pdp->u.link_verify.lv_xbar),
		       pdp->u.link_verify.lv_xbar_port),
	pdp->origin->nic_id, pdp->pd_port,
	lf_route_str(pdp->pd_route, pdp->pd_route_len),
	lf_route_str(pdp->packet.nic_scout.reply_route,
		     pdp->packet.nic_scout.reply_route_len_8),
	lf_node_string(LF_NODE(pdp->u.nic_verify.nv_nic),
		       pdp->u.nic_verify.nv_port));

    /* request re-map */ 
    fma_map_is_invalid(FALSE, why);
  }

  /* if interval still > 0, start re-probe timer */
  if (pdp->pd_timer != NULL) {
    LF_ERROR(("pd_timer should be NULL here!"));
  }
  if (asp->verify_interval > 0) {
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling verify probe"));
  }

  return;

 except:
  fma_perror_exit(1);
}

/*
 * Find a NIC on the far side of this xbar link so we can test it.  We
 * ensure that this is a link between two xbars, and find which one it closer
 * to one of the NICs on this host.  We then go from the other end until we
 * find a NIC which is not the origination NIC and that is the target
 * NIC for this verify probe.
 *
 * If all OK, return 0;
 * If a truly fatal error occurs, print error and exit.
 * If remap needed, return -1.
 * If we just can't do this probe and advise skipping it, return -2.
 *
 */
static int
fma_find_verify_route(
  struct fma_probe_desc *pdp)
{
  struct lf_fabric *fp;
  struct lf_xbar *txp;
  struct lf_xbar *xp1;
  struct lf_xbar *xp2;
  struct lf_xbar *work_list;
  struct lf_xbar *new_work_list;
  union lf_node *np;
  unsigned int serial;
  int old_link_state;
  int in_port;
  int tp;
  int p1;
  int p2;
  int fatal;

  fp = A.fabric;
  fatal = 0;
  
  /* Get pointer to xbar/port being verified */
  xp1 = pdp->u.link_verify.lv_xbar;
  p1 = pdp->u.link_verify.lv_xbar_port;

  /* Make this end is an xbar */
  if (xp1->ln_type != LF_NODE_XBAR) {
    LF_ERROR(("Link endpoint 1 is not an xbar!"));
  }

  /* get the other end */
  xp2 = LF_XBAR(xp1->topo_ports[p1]);
  p2 = xp1->topo_rports[p1];

  /* Make sure there is a link, and that it is an xbar */
  if (xp2 == NULL) {
    LF_ERROR(("Requested to test missing link!"));
  }
  if (xp2->ln_type != LF_NODE_XBAR) {
    LF_ERROR(("Link endpoint 2 is not an xbar!"));
  }

  /* If either xbar is unreachable, we have no way to test this link.
   * This can happen while FMS is still resolving the map.
   */
  if (FMA_XBAR(xp1)->host_dist == -1
      || FMA_XBAR(xp2)->host_dist == -1) {
    if (A.debug) fma_log("Being asked to verify unreachable xbar");
    return -2;
  }

  /* Record the state of the link - if it is down now, we are looking
   * for it to go up!
   */
  pdp->u.link_verify.lv_link_down = (xp1->link_state[p1] != LF_LINK_STATE_UP);

  /* We have both xbars, make xp1 be the one closest to this host */
  if (FMA_XBAR(xp2)->host_dist < FMA_XBAR(xp1)->host_dist) {

    txp = xp1;	/* swap 'em */
    xp1 = xp2;
    xp2 = txp;
    tp = p1;
    p1 = p2;
    p2 = tp;
  }

  /* Save the starting NIC information */
  pdp->origin = FMA_XBAR(xp1)->host_nic;
  pdp->pd_port = FMA_XBAR(xp1)->host_nic_port;

  /* Find the closest NIC to xp2 that we can without using p2.
   * This is a BFS outwards from xp2 until we hit a NIC.
   */
  old_link_state = xp2->link_state[p2];
  lf_set_xbar_link_state(xp2, p2, LF_LINK_STATE_DOWN);

  /* initial route to this xbar */
  memcpy(FMA_XBAR(xp2)->fx_route, FMA_XBAR(xp1)->host_route,
	 FMA_XBAR(xp1)->host_dist);
  FMA_XBAR(xp2)->fx_route[FMA_XBAR(xp1)->host_dist] =
    LF_DELTA_TO_ROUTE(p1 - FMA_XBAR(xp1)->host_in_port);
  FMA_XBAR(xp2)->fx_route_len = FMA_XBAR(xp1)->host_dist + 1;
  FMA_XBAR(xp2)->fx_in_port = xp1->topo_rports[p1];

  /* setup lists */
  work_list = xp2;
  FMA_XBAR(xp2)->bf_next = NULL;
  new_work_list =  NULL;

  pdp->u.link_verify.lv_nic = NULL;		/* no NIC yet */

  serial = ++A.myri->bfs_serial;	/* use serial numbers to test visited */
  FMA_XBAR(xp2)->bf_serial = serial;

  while (work_list != NULL) {
    char port_numbers [LF_MAX_XBAR_PORTS];
    struct lf_xbar *xp;
    int i;
    int p;

    /* pull the top xbar off the list */
    xp = work_list;
    work_list = FMA_XBAR(xp)->bf_next;

    /* check each port */
    in_port = FMA_XBAR(xp)->fx_in_port;

    fma_shuffle_port_order (xp, port_numbers);
    
    for (i=0; i<xp->num_ports; ++i) {

      /* get next port number from the shuffled list*/
      p = port_numbers [i];

      /* Don't violate quadrant disable */
      if (lf_xbar_qd_violation(xp, in_port, p)) continue;

      /* see where this link goes */
      np = xp->topo_ports[p];

      /* skip over down or disconnected links */
      if (np == NULL || xp->link_state[p] != LF_LINK_STATE_UP) continue;

      /* If this is a NIC, use this as target and exit loop */
      if (np->ln_type == LF_NODE_NIC) {

	/* Fill in route for probe */
	memcpy(pdp->pd_route, FMA_XBAR(xp)->fx_route,
	       FMA_XBAR(xp)->fx_route_len);
	pdp->pd_route[FMA_XBAR(xp)->fx_route_len] =
	  LF_DELTA_TO_ROUTE(p - FMA_XBAR(xp)->fx_in_port);
	pdp->pd_route_len = FMA_XBAR(xp)->fx_route_len + 1;

	/* record the NIC we should find */
	pdp->u.link_verify.lv_nic = LF_NIC(np);
	pdp->u.link_verify.lv_nic_port = xp->topo_rports[p];

	goto search_done;

      /* If it is an xbar and not visited, add it to new work list */
      } else if (np->ln_type == LF_NODE_XBAR) {

	/* If visited, skip it */
	if (FMA_XBAR_N(np)->bf_serial == serial) continue;

	/* Not yet visited, build route to this xbar and add it to
	 * the new work list
	 */
	memcpy(FMA_XBAR_N(np)->fx_route, FMA_XBAR(xp)->fx_route,
	       FMA_XBAR(xp)->fx_route_len);
	FMA_XBAR_N(np)->fx_route[FMA_XBAR(xp)->fx_route_len] =
	  LF_DELTA_TO_ROUTE(p - FMA_XBAR(xp)->fx_in_port);
	FMA_XBAR_N(np)->fx_route_len = FMA_XBAR(xp)->fx_route_len + 1;
	FMA_XBAR_N(np)->fx_in_port = xp->topo_rports[p];

	FMA_XBAR_N(np)->bf_serial = serial;	/* mark as visited */

	/* Add to the next work list */
	FMA_XBAR_N(np)->bf_next = new_work_list;
	new_work_list = LF_XBAR(np);

      /* any other node type is an error */
      } else {
	LF_ERROR(("Bad node type"));
      }
    }

    /* If done with this level of BFS, advance to the next */
    if (work_list == NULL) {
      work_list = new_work_list;
      new_work_list = NULL;
    }
  }
 search_done:

  /* restore the old link state to the one we avoided */
  lf_set_xbar_link_state(xp2, p2, old_link_state);

  /* If no NIC found, we can't do this probe... */
  if (pdp->u.link_verify.lv_nic == NULL) {
    fatal = -2;
    LF_ERROR(("No NIC found to service probe"));
  }

  return 0;

 except:
  if (fatal == 0) {
    fma_perror_exit(1);
  }
  return fatal;
}

/*
 * Define a probe which looks for an empty port on a node to stop being 
 * empty.  It sends both tagged and anonymous xbar probes, since quadrant
 * disable may prevent tagged xbars from responding to simple roundtrips.
 *
 */
void
fma_define_empty_port_probe(
  union lf_node *np,
  int port)
{
  if (A.debug > 1) {
    fma_log("Watching empty port %s", lf_node_string(np, port));
  }

  /* If no route available, don't bother */
  if (np->ln_type == LF_NODE_XBAR && FMA_XBAR_N(np)->host_dist == -1) {
    if (A.debug) {
      fma_log("Skipping empty port probe for %s", lf_node_string(np, port));
    }
    return;
  }

  /* define one ID-less probe and one ID-full */
  if (A.xbar_types != FMA_XT_ID_ONLY) {
    fma_define_one_empty_port_probe(np, port, FALSE);
  }
  if (A.xbar_types != FMA_XT_NO_IDS) {
    fma_define_one_empty_port_probe(np, port, TRUE);
  }

  fma_define_empty_nic_probe(np, port);
}

static void
fma_define_one_empty_port_probe(
  union lf_node *np,
  int port,
  int tagged)
{
  struct fma_settings *asp;
  struct fma_probe_desc *pdp;
  struct fma_probe_desc *anchor;
  int first_interval;
  int rc;

  anchor = A.myri->verify_anchor;
  asp = A.settings;

  /* allocate a probe descriptor for this verify probe */
  LF_CALLOC(pdp, struct fma_probe_desc, 1);

  /* fill in fields for the probe */
  pdp->pd_type = FMA_PT_EMPTY_PORT_CHECK_XBAR;
  pdp->pd_timeout = asp->verify_timeout;
  pdp->u.empty_port_verify.np = np;
  pdp->u.empty_port_verify.port = port;
  pdp->resp_rtn = fma_empty_port_resp;
  pdp->fail_rtn = fma_empty_port_failed;
  pdp->persistant = TRUE;

  /* find the local NIC, port, and route we will use to probe this port */
  if (np->ln_type == LF_NODE_XBAR) {
    rc = fma_find_empty_port_route(pdp, LF_XBAR(np), port, tagged);
    if (rc < 0) {
      LF_FREE(pdp);
      LF_ERROR(("Cannot build route to empty port"));
    }
  } else {
    pdp->pd_route[0] = tagged ? LF_TAGGED_XBAR_QUERY_ROUTE
			      : LF_DELTA_TO_ROUTE(0);
    pdp->pd_route_len = 1;
    pdp->origin = FMA_NIC_N(np)->local_nip;
    pdp->pd_port = port;
  }

  /* attach it to list of verify probes */
  pdp->user_next = anchor->user_next;
  pdp->user_prev = anchor;
  pdp->user_next->user_prev = pdp;
  pdp->user_prev->user_next = pdp;

  /* Fill in the packet for this probe, except for serial # */
  if (tagged) {
    fma_fill_tagged_xbar_scout(&pdp->packet.xbar_scout_pkt,
	pdp->origin, pdp->pd_port, 0);
  } else {
    fma_fill_xbar_scout(&pdp->packet.xbar_scout_pkt,
	pdp->origin, pdp->pd_port, 0);
  }
  pdp->pkt_len = sizeof(pdp->packet.xbar_scout_pkt);

#if 0
fma_log("probe %p empty port, %s route=%s",
  pdp, lf_node_string(np, port),
  lf_route_str(pdp->pd_route, pdp->pd_route_len));
#endif

  /* schedule the verify probe for the first time */
  if (asp->verify_interval > 0) {
    first_interval = random() % asp->verify_interval;
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	first_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error scheduling empty port probe"));
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Define a probe that scouts an empty port for a new nic.
 *
 */

static void
fma_define_empty_nic_probe(
  union lf_node *np,
  int port)
{
  struct fma_settings *asp;
  struct fma_probe_desc *pdp;
  struct fma_probe_desc *anchor;
  int first_interval;
  int rc;

  anchor = A.myri->verify_anchor;
  asp = A.settings;

  /* allocate a probe descriptor for this verify probe */
  LF_CALLOC(pdp, struct fma_probe_desc, 1);

  /* fill in fields for the probe */
  /* we can use the same callbacks as the empty port xbar probes*/

  pdp->pd_type = FMA_PT_EMPTY_PORT_CHECK_NIC;
  pdp->pd_timeout = asp->verify_timeout;
  pdp->u.empty_port_verify.np = np;
  pdp->u.empty_port_verify.port = port;
  pdp->resp_rtn = fma_empty_port_resp;
  pdp->fail_rtn = fma_empty_port_failed;
  pdp->persistant = TRUE;

  /* find the local NIC, port, and route we will use to probe this port */
  if (np->ln_type == LF_NODE_XBAR) {
    rc = fma_find_empty_nic_route(pdp, LF_XBAR(np), port);
    if (rc < 0) {
      LF_FREE(pdp);
      LF_ERROR(("Cannot build route to empty nic"));
    }
  } else {
    pdp->pd_route_len = 0;
    pdp->origin = FMA_NIC_N(np)->local_nip;
    pdp->pd_port = port;
  }

  /* attach it to list of verify probes */
  pdp->user_next = anchor->user_next;
  pdp->user_prev = anchor;
  pdp->user_next->user_prev = pdp;
  pdp->user_prev->user_next = pdp;

  /* Fill in the packet for this probe, except for serial # */
  fma_fill_nic_verify_scout (&pdp->packet.nic_scout, pdp->origin, pdp->pd_port,
      0, pdp->pd_route, pdp->pd_route_len);
  pdp->pkt_len = sizeof(pdp->packet.nic_scout);

#if 0
{
fma_log("probe %p empty nic, route=%s",
  pdp, lf_node_string(np, port),
  lf_route_str(pdp->pd_route, pdp->pd_route_len));
}
#endif

  /* schedule the verify probe for the first time */
  if (asp->verify_interval > 0) {
    first_interval = random() % asp->verify_interval;
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	first_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error scheduling empty port probe"));
  }
  return;

 except:
  fma_perror_exit(1);
}


/*
 * An empty port probe came up with nothing, just reschedule it
 */
static void
fma_empty_port_failed(
  struct fma_probe_desc *pdp)
{
  struct fma_settings *asp;

  asp = A.settings;
  
  /* if non-zero interval, start re-probe timer */
  if (pdp->pd_timer != NULL) {
    LF_ERROR(("pd_timer should be NULL here, serial=%d", pdp->pd_serial));
  }
  if (asp->verify_interval > 0) {
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling verify probe"));
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * We got a response from a supposedly empty port - this map is invalid!
 */
static void
fma_empty_port_resp(
  struct fma_probe_desc *pdp,
  void *vpkt,
  int len)
{
  struct fma_settings *asp;
  lf_string_t why;

  asp = A.settings;

  sprintf(why, "empty port %s is no longer empty",
	lf_node_string(pdp->u.empty_port_verify.np,
		       pdp->u.empty_port_verify.port));

  /* just call the map invalid, this probe will get cleaned up later */
  fma_map_is_invalid(TRUE, why);
  
  /* if non-zero interval, start re-probe timer */
  if (pdp->pd_timer != NULL) {
    LF_ERROR(("pd_timer should be NULL here, serial=%d", pdp->pd_serial));
  }
  if (asp->verify_interval > 0) {
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling verify probe"));
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Find a route to test a supposedly disconnected port on an xbar
 */
static int
fma_find_empty_port_route(
  struct fma_probe_desc *pdp,
  struct lf_xbar *xp,
  int port,
  int tagged)
{
  int dist;

  /* Save the starting NIC information */
  pdp->origin = FMA_XBAR(xp)->host_nic;
  pdp->pd_port = FMA_XBAR(xp)->host_nic_port;

  /* initial route to this xbar */
  dist = FMA_XBAR(xp)->host_dist;
  memcpy(pdp->pd_route, FMA_XBAR(xp)->host_route, dist);
  pdp->pd_route[dist] = LF_DELTA_TO_ROUTE(port - FMA_XBAR(xp)->host_in_port);
  pdp->pd_route[dist+1] = tagged ? LF_TAGGED_XBAR_QUERY_ROUTE
                                 : LF_DELTA_TO_ROUTE(0);
  lf_reverse_route(pdp->pd_route + dist+2, pdp->pd_route, dist+1);

  pdp->pd_route_len = (dist+1) * 2 + 1;
  
  return 0;
}

/*
 * Find a route to test for a nic being connected to a supposedly
 * disconnected port on an xbar
 */
static int
fma_find_empty_nic_route(
  struct fma_probe_desc *pdp,
  struct lf_xbar *xp,
  int port)
{
  int dist;

  if (port == FMA_XBAR(xp)->host_in_port) {
    LF_ERROR(("tried to verify my in port"));
  }

  /* Save the starting NIC information */
  pdp->origin = FMA_XBAR(xp)->host_nic;
  pdp->pd_port = FMA_XBAR(xp)->host_nic_port;

  /* route to this xbar and then out to a nic at the port */
  dist = FMA_XBAR(xp)->host_dist;
  memcpy(pdp->pd_route, FMA_XBAR(xp)->host_route, dist);
  pdp->pd_route[dist] = LF_DELTA_TO_ROUTE(port - FMA_XBAR(xp)->host_in_port);
  pdp->pd_route_len = (dist+1);
  
  except:
  return 0;
}

/*
 *  define a probe to test a NIC port
 */
void
fma_define_host_probe(
  struct fma_nic_info *nip,
  int lcl_port,
  struct lf_nic *nicp,
  int rmt_port)
{
  struct fma_settings *asp;
  struct fma_probe_desc *pdp;
  struct fma_probe_desc *anchor;
  unsigned char *route;
  int route_len;
  int rindex;
  union lf_node *onp;
  int oport;
  int first_interval;

  anchor = A.myri->verify_anchor;
  asp = A.settings;

  if (A.debug) {
    fma_log("host probe from NIC %dp%d to %s",
	    nip->nic_index, lcl_port,
	    lf_node_string(LF_NODE(nicp), rmt_port));
  }

  /* Find route to this NIC/port */
  rindex = FMA_ROUTE_INDEX(nip->nic_index, lcl_port, rmt_port);
  route_len = FMA_NIC(nicp)->route_lens[rindex];
  if (route_len == -1) {
    LF_ERROR(("No route to %s from NIC %d, port %d",
	  lf_node_string(LF_NODE(nicp), rmt_port),
	  nip->nic_index, lcl_port));
  }
  route = FMA_NIC(nicp)->route_buf + FMA_ROUTE_OFFSET(rindex);

  pdp = fma_create_nic_scout_probe(nip, lcl_port, FMA_PT_NIC_VERIFY,
      fma_nic_probe_resp, fma_nic_probe_failed, route, route_len);
  
  pdp->pd_timeout = asp->verify_timeout;  
  pdp->persistant = TRUE;

  /*find the nic and port at the end of our quick route to the host*/
  onp = lf_follow_route(LF_NODE(pdp->origin->nic_ptr), pdp->pd_port,
			pdp->pd_route, pdp->pd_route_len, &oport);
  if (onp == NULL || onp->ln_type != LF_NODE_NIC) {
    LF_ERROR(("Expected NIC at other end of route"));
  }

  pdp->u.nic_verify.nv_nic = LF_NIC (onp);
  pdp->u.nic_verify.nv_port = oport;

  /* attach it to list of verify probes */
  pdp->user_next = anchor->user_next;
  pdp->user_prev = anchor;
  pdp->user_next->user_prev = pdp;
  pdp->user_prev->user_next = pdp;

  /* schedule the verify probe for the first time */
  if (asp->verify_interval > 0) {
    first_interval = random() % asp->verify_interval;
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	first_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error scheduling nic verify probe"));
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * we didn't get a response from a nic. map is invalid.
 */
static void
fma_nic_probe_failed(
  struct fma_probe_desc *pdp)
{  
  struct fma_settings *asp;
  lf_string_t why;

  asp = A.settings;

  if (pdp->pd_timer != NULL) {
    fma_log("%p: timeout timer still active??", pdp);
  }
  
  /* request re-map */ 
  sprintf(why, "%s did not respond to probe",
      lf_node_string(LF_NODE(pdp->u.nic_verify.nv_nic),
	             pdp->u.nic_verify.nv_port));
  fma_map_is_invalid(FALSE, why);

  /* If non-zero interval, start re-probe timer,
   * else just hang out on the list.  We restart even if map invalid because
   * we might still be in map grace period, and thus be ignored.
   */
  if (pdp->pd_timer != NULL) {
    LF_ERROR(("pd_timer should be NULL here!"));
  }
  if (asp->verify_interval > 0) {
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling NIC probe"));
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * We got a response from a nic. see if it's changed somehow.
 * this function could be a no-op, except we want to support non verifying
 * nics, these need the be checked for changes as well as for still being
 * there.
 */
static void
fma_nic_probe_resp(
  struct fma_probe_desc *pdp,
  void *vpkt,
  int len)
{
  struct myri_nic_scout_reply *pkt;
  struct lf_nic *nicp;
  struct fma_settings *asp;
  struct fma_nic_verify_scout_opaque_data *sodp;
  struct fma_nic_reply_opaque_data *rodp;
  int invalid;
  lf_string_t why;

  pkt = (struct myri_nic_scout_reply *) vpkt;
  sodp = (struct fma_nic_verify_scout_opaque_data *) &pkt->opaque_scout_data;
  rodp = (struct fma_nic_reply_opaque_data *) &pkt->opaque_reply_data;
  asp = A.settings;
  nicp = pdp->u.nic_verify.nv_nic;


  if (A.debug) {
    fma_log("got resp from %s, flags=%x",
	lf_node_string(LF_NODE(pdp->u.nic_verify.nv_nic),
	             pdp->u.nic_verify.nv_port),
	nicp->host->fma_flags);
    if (nicp->host->fma_flags & FMA_FLAG_CAN_DO_LAG) {
      fma_log("  LAG_ID = %s", lf_lag_id_string(rodp->nr_lag_id));
    }
  }
  
  invalid = FALSE;
  if (LF_MAC_CMP (pkt->mac_addr, nicp->mac_addr) ||
      pkt->port_8 != pdp->u.nic_verify.nv_port) {
    invalid = TRUE;
    sprintf(why, "expected reply from %s"
	    " got %s p%d instead", 
	    lf_node_string(LF_NODE(nicp), pdp->u.nic_verify.nv_port),
	    fma_mac_to_hostname(pkt->mac_addr), pkt->port_8);

  } else if (ntohl(rodp->map_version_32) != A.map_info->mi_map_version) {
    invalid = TRUE;
    sprintf(why, "%s changed its map version to %d",
	    lf_node_string(LF_NODE(nicp), pdp->u.nic_verify.nv_port),
	    htonl (rodp->map_version_32));

  } else if (LF_MAC_CMP(rodp->mapper_mac_addr,
	                A.map_info->mi_mapper_mac_addr) != 0) {
    invalid = TRUE;
    sprintf(why, "%s has mapper address of %s, expected %s", 
	    lf_node_string(LF_NODE(nicp), pdp->u.nic_verify.nv_port),
	    fma_mac_to_hostname(rodp->mapper_mac_addr),
	    fma_mac_to_hostname(A.map_info->mi_mapper_mac_addr));

  } else if ((nicp->host->fma_flags & FMA_FLAG_CAN_DO_LAG) &&
             !lf_lag_match(rodp->nr_lag_id, nicp->nic_lag_id)) {
    invalid = TRUE;
    sprintf(why, "%s changed its LAG_ID",
	lf_node_string(LF_NODE(nicp), pdp->u.nic_verify.nv_port));
    if (A.debug) {
      fma_log("LAG_ID was %s", lf_lag_id_string(nicp->nic_lag_id));
      fma_log("LAG_ID is %s", lf_lag_id_string(rodp->nr_lag_id));
    }
  }
  
  /* If non-zero interval, start re-probe timer,
   * else just hang out on the list.  We restart even if map invalid because
   * we might still be in map grace period, and thus be ignored.
   */
  if (pdp->pd_timer != NULL) {
    LF_ERROR(("pd_timer should be NULL here!"));
  }
  if (asp->verify_interval > 0) {
    pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	asp->verify_interval);
    if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling NIC probe"));
  }
  
  /* If map invalid, report it */
  if (invalid) {
    fma_map_is_invalid(TRUE, why);
  }
  return;

 except:
  fma_perror_exit(1);
}


#if 0
void
dumpit(
  struct lf_fabric *fp)
{
  lf_string_t s;
  int off;
  int i,n;
  int x;

  fma_log("Fabric loaded, %d hosts", fp->num_hosts);
  for (i=0; i<fp->num_hosts; ++i) {
   lf_host_t *hp;
   hp = fp->hosts[i];
   if (hp == NULL) { fma_log("%d: null", i); continue; }
   fma_log("\"%s\", %d NICs", hp->hostname, hp->num_nics);
   for (n=0; n<hp->num_nics;++n) {
     lf_nic_t *np;
     np = hp->nics[n];
     fma_log("\t%d " LF_MAC_FORMAT " sn=%s, pn=%s", n,
       LF_MAC_ARGS(np->mac_addr),
       np->serial_no, np->product_id);
   }
  }

  fma_log("%d xbars", fp->num_xbars);
  for (x=0; x<fp->num_xbars; ++x) {
    struct lf_xbar *xp;
    int p;

    xp = LF_XBAR(fp->xbars[x]);
    fma_log("      xbar[%d] - xid=%d, %d ports, clos=%d, dist=%d", x,
	xp->xbar_id, xp->num_ports,
	xp->clos_level,
	FMA_XBAR(xp)->host_dist);
    for (p=0; p<xp->num_ports; ++p) {
      union lf_node *onp;

      off = sprintf(s, "       %d -", p);

      onp = xp->topo_ports[p];
      if (onp == NULL) {
	fma_log(s);
	continue;
      }

      if (onp->ln_type == LF_NODE_XBAR) {
	struct lf_xbar *oxp;
	oxp = LF_XBAR(onp);
	off += sprintf(s+off, " %s, clos=%d",
	        lf_node_string(LF_NODE(oxp), xp->topo_rports[p]),
		oxp->clos_level);

      } else if (onp->ln_type == LF_NODE_NIC) {
	struct lf_nic *onicp;
	onicp = LF_NIC(onp);
	off += sprintf(s+off, " %s", lf_node_string(LF_NODE(onicp),
						    xp->topo_rports[p]));
      } else {
	off += sprintf(s+off, " ????");
      }

      fma_log("%s)", s);
    }
  }
}
#endif

/*
 * This is called when verify_interval is set from 0 to something non-zero.
 * It walks the verify prove list looking for inactive probes and
 * restarts them.
 */
void
fma_new_verify_interval()
{
  struct fma_probe_desc *pdp;
  struct fma_settings *asp;
  int first_interval;

  asp = A.settings;

  /* If the interval is being reset, nothing to do right now */
  if (asp->verify_interval <= 0) {
    return;
  }

  pdp = A.myri->verify_anchor->user_next;
  while (pdp != A.myri->verify_anchor) {

    /* only restart disabled probes */
    if (pdp->pd_sync_timer == NULL && pdp->pd_timer == NULL) {
      first_interval = random() % asp->verify_interval;
      pdp->pd_timer = lf_schedule_event(fma_initiate_verify_probe, pdp,
	  first_interval);
      if (pdp->pd_timer == NULL) LF_ERROR(("Error rescheduling verify probe"));
    }

    pdp = pdp->user_next;
  }
  return;

 except:
  fma_perror_exit(1);
}

/*
 * Select the NIC ports we should be probing with verify probes.  This
 * is broken into 2 sets, NICs than can verify, and those that cannot.
 * Probes among verify-capable NICs need to be assigned in such a way that
 * there exists no mutually-verifying subset that could disappear all at once
 * and therefore never be noticed.  (e.g. 2 nics verifying each other in 
 * a 100 node fabric that both go down about the same time.)
 *
 * Probes of non-verifiers are divided up evenly among all the verify capable
 * nodes.
 * 
 * To do this, we loop through all of our NIC ports, computing an ordered
 * list of NIC ports that can be reached from that port (this is called a 
 * "sub-fabric").  This list will be the same and in the same order on all 
 * nodes of a sub-fabric, since the order will be determined by the topo map
 * and all nodes receive the same topo map.
 *
 * Each verify-capable node will verify the verify-capable node immediately
 * following it in the list, with wrap.
 *
 * If there are V verify capable nodes, the Nth verify-capable node in the
 * list will verify every Vth non-verify-capable node starting with the Nth
 * one.
 */
void
fma_create_nic_verify_probes(
  struct lf_fabric *fp)
{
  struct lf_nic **subfab_nics;
  int *subfab_nic_ports;
  int num_subfab_nic_ports;
  int num_subfab_verifier_ports;
  struct fma_nic_info *nip;
  struct lf_nic *nicp;
  int my_index;
  int my_ver_index;
  int next_index;
  int nonver_index;
  int i;
  int n;
  int p;

  /* allocate an array of NIC ports on our local sub-fabric */
  subfab_nics = NULL;
  subfab_nic_ports = NULL;
  num_subfab_nic_ports = 0;
  num_subfab_verifier_ports = 0;
  LF_CALLOC(subfab_nics, struct lf_nic *,
  	    FMA_FABRIC(fp)->num_nics * FMA_FABRIC(fp)->max_nic_ports);
  LF_CALLOC(subfab_nic_ports, int,
  	    FMA_FABRIC(fp)->num_nics * FMA_FABRIC(fp)->max_nic_ports);

  /* Mark all ports as not being in current subfabric */
  fma_clear_subfab_indices();

  for (n=0; n<A.myri->num_nics; ++n) {
    nip = A.myri->nic_info[n];
    for (p=0; p<nip->myri_info.num_ports; ++p) {

      /* If this port is not in latest participant list, recompute */
      if (nip->sf_port_index[p] == -1) {

	/* Compute sub-fabric participants for first NIC port */
	num_subfab_nic_ports = fma_find_subfab_participants(fp,
	      nip, p, subfab_nics, subfab_nic_ports,
	      &num_subfab_verifier_ports);
      }

      /* get this port's indices */
      my_index = nip->sf_port_index[p];
      my_ver_index = nip->sf_ver_index[p];

      /* save counts */
      nip->sf_ports[p] = num_subfab_nic_ports;
      nip->sf_verifiers[p] = num_subfab_verifier_ports;

      if (A.debug) {
	fma_log("NIC %d, p%d shares subfabric with %d ports, %d verifiers",
	    n, p, num_subfab_nic_ports, num_subfab_verifier_ports);
	fma_log("  port index=%d, verifier index=%d", my_index, my_ver_index);
      }

      /* If num_verifiers is somehow 0, skip this */
      if (num_subfab_verifier_ports == 0) {
	if (A.debug) fma_log("num_subfab_verifier_ports is 0 ?");
	continue;
      }

      /*
       * Find the first verifier after us
       */
      next_index = my_index;
      do {
	++next_index;
	if (next_index >= num_subfab_nic_ports) next_index = 0;
	if (subfab_nics[next_index]->host->fma_flags & FMA_FLAG_CAN_VERIFY) {
	  break;
	}
      } while (next_index != my_index);
      if (next_index != my_index) {
	fma_define_host_probe(nip, p, subfab_nics[next_index],
			      subfab_nic_ports[next_index]);
      }

      /*
       * Find every non-verify-capable node in the list where
       * index%V = my_index (see intro for full explanation)
       */
      nonver_index = 0;
      for (i=0; i<num_subfab_nic_ports; ++i) {
	nicp = subfab_nics[i];
	if (!(nicp->host->fma_flags & FMA_FLAG_CAN_VERIFY)) {
	  if ((nonver_index % num_subfab_verifier_ports) == my_ver_index) {
	    fma_define_host_probe(nip, p, nicp, subfab_nic_ports[i]);
	  }
	  ++nonver_index;
	}
      }
    }
  }

  LF_FREE(subfab_nic_ports);
  LF_FREE(subfab_nics);
  return;

 except:
  LF_FREE(subfab_nic_ports);
  LF_FREE(subfab_nics);
  fma_perror_exit(1);
}


/*
 * Clear all subfabric indices for all NIC ports
 */
void
fma_clear_subfab_indices()
{
  struct fma_nic_info *nip;
  int n;
  int p;

  /* -1 means not part of the current sub-fabric */
  for (n=0; n<A.myri->num_nics; ++n) {
    nip = A.myri->nic_info[n];
    for (p=0; p<nip->myri_info.num_ports; ++p) {
      nip->sf_port_index[p] = -1;
    }
  }
}

/*
 * create an ordered array of every NIC port on the same sub-fabric
 */
int
fma_find_subfab_participants(
  struct lf_fabric *fp,
  struct fma_nic_info *nip,
  int lcl_port,
  struct lf_nic **subfab_nics,
  int *subfab_nic_ports,
  int *num_subfab_verifier_ports)
{
  struct lf_host *hp;
  struct lf_nic *nicp;
  int num_subfab_ports;
  struct fma_nic_info *lnip;
  int rindex;
  int rlen;
  int num_ver;
  int h;
  int n;
  int p;

  /* clear all indices */
  fma_clear_subfab_indices();

  num_subfab_ports = 0;
  num_ver = 0;

  for (h=0; h<fp->num_hosts; ++h) {
    hp = fp->hosts[h];

    for (n=0; n<hp->num_nics; ++n) {
      nicp = hp->nics[n];
      for (p=0; p<nicp->num_ports; ++p) {

	rindex = FMA_ROUTE_INDEX(nip->nic_index, lcl_port, p);
	rlen = FMA_NIC(nicp)->route_lens[rindex];
	lnip = FMA_NIC(nicp)->local_nip;

	/* Add this if route is good or is same port as checking for */
	if (rlen >= 0 || (lnip == nip && p == lcl_port)) {

	  /* If this is a local NIC, save the index */
	  if (FMA_NIC(nicp)->local_nip != NULL) {
	    FMA_NIC(nicp)->local_nip->sf_port_index[p] = num_subfab_ports;
	    FMA_NIC(nicp)->local_nip->sf_ver_index[p] = num_ver;
	  }

	  /* Add this NIC to the sub-fabric array */
	  subfab_nics[num_subfab_ports] = nicp;
	  subfab_nic_ports[num_subfab_ports] = p;
	  ++num_subfab_ports;

	  /* count number of verifiers */
	  if (hp->fma_flags & FMA_FLAG_CAN_VERIFY) ++num_ver;
	}
      }
    }
  }

  *num_subfab_verifier_ports = num_ver;
  return num_subfab_ports;
}

/*
 * create empty port probes - for each NIC port, make a list of all the empty
 * xbar ports on this subfabric, and verify every Vth one starting with node
 * N, where V is the number of verifiers and this port is the Nth verifier
 * port.
 *
 * Algorithm is basically the same as for finding other nodes to verify.
 */
void
fma_create_empty_xbar_port_probes(
  struct lf_fabric *fp)
{
  struct lf_xbar **subfab_xbars;
  int *subfab_xbar_ports;
  struct fma_nic_info *nip;
  int num_subfab_empty_ports;
  int my_ver_index;
  int num_verifiers;
  int nic_port;
  int n;
  int i;

  /* allocate an array for the empty xbar ports on our local sub-fabric */
  subfab_xbars = NULL;
  subfab_xbar_ports = NULL;
  num_subfab_empty_ports = 0;
  LF_CALLOC(subfab_xbars, struct lf_xbar *,
  	    fp->num_xbars * A.map_info->mi_most_ports * 2);
  LF_CALLOC(subfab_xbar_ports, int,
  	    fp->num_xbars * A.map_info->mi_most_ports * 2);

  for (n=0; n<A.myri->num_nics; ++n) {
    nip = A.myri->nic_info[n];
    for (nic_port=0; nic_port<nip->myri_info.num_ports; ++nic_port) {

      /* Compute list of empty xbar ports on this subfabric */
      num_subfab_empty_ports = fma_find_subfab_empty_ports(fp,
	    nip, nic_port, subfab_xbars, subfab_xbar_ports);

      /* get this port's indices */
      my_ver_index = nip->sf_ver_index[nic_port];
      num_verifiers = nip->sf_verifiers[nic_port];

      /* If num_verifiers is somehow 0, skip this */
      if (num_verifiers == 0) {
	if (A.debug) fma_log("num_verifiers is 0 ?");
	continue;
      }

      if (A.debug) {
	fma_log("NIC %d, p%d - %d empty ports on subfabric",
	    n, nic_port, num_subfab_empty_ports);
      }

      /*
       * Find every empty port in the list where
       * index%V = my_index (see intro for full explanation)
       */
      for (i=0; i<num_subfab_empty_ports; ++i) {
	if ((i % num_verifiers) == my_ver_index) {
	  fma_define_empty_port_probe(LF_NODE(subfab_xbars[i]),
				      subfab_xbar_ports[i]);
	}
      }
    }
  }

  /* free the arrays */
  LF_FREE(subfab_xbar_ports);
  LF_FREE(subfab_xbars);
  return;

 except:
  LF_FREE(subfab_xbar_ports);
  LF_FREE(subfab_xbars);
  fma_perror_exit(1);
}

/*
 * Find all empty xbar ports on the same sub-fabric as this NIC port.
 * We find xbars on the same subfabric by checking that xbar_dist for this
 * NIC port is not -1.
 */
static int
fma_find_subfab_empty_ports(
  struct lf_fabric *fp,
  struct fma_nic_info *nip,
  int nic_port,
  struct lf_xbar **subfab_xbars,
  int *subfab_xbar_ports)
{
  int num_empty_ports;
  struct lf_xbar *xp;
  int nicport;
  int xbar_port;
  int missing_ports;
  int x;
  int i;

  /* compute index into xbar_dist array */
  nicport = FMA_NICPORT_INDEX(nip->nic_index, nic_port);

  num_empty_ports = 0;
  for (x=0; x<fp->num_xbars; ++x) {
    xp = fp->xbars[x];

    /* skip xbars not on this subfabric */
    if (FMA_XBAR(xp)->xbar_dist[nicport] == -1) continue;

    for (xbar_port=0; xbar_port<xp->num_ports; ++xbar_port) {

      /* If this port is empty or down, add it to the list */
      if (xp->topo_ports[xbar_port] == NULL
	  || xp->link_state[xbar_port] == LF_LINK_STATE_DOWN) {
	subfab_xbars[num_empty_ports] = xp;
	subfab_xbar_ports[num_empty_ports] = xbar_port;
	++num_empty_ports;
      }
    }

    /* Now, deal with the fact than on anonymous xbars, we don't know
     * the absolute port number. If there appear to be N ports, then we
     * add 16-N ports below 0 and 16-N ports beyond the last.
     */
    if (xp->xbar_id == 0) {
      missing_ports = 16 - xp->num_ports;
      for (i=0; i<missing_ports; ++i) {
	subfab_xbars[num_empty_ports] = xp;
	subfab_xbar_ports[num_empty_ports] = -i-1;
	++num_empty_ports;

	subfab_xbars[num_empty_ports] = xp;
	subfab_xbar_ports[num_empty_ports] = xp->num_ports+i;
	++num_empty_ports;
      }
    }
  }

  return num_empty_ports;
}

/*
 * Make probe assignments
 */
void
fma_assign_verifies()
{
  struct lf_fabric *fp;
  int num;
  int rc;
  int x;

  fp = A.fabric;

  num = 0;
  for (x=0; x<fp->num_xbars; ++x) {
    struct lf_xbar *xp;
    int p;

    xp = fp->xbars[x];
    for (p=0; p<xp->num_ports; ++p) {
      struct lf_nic *nicp;

      nicp = xp->verifiers[p].ver_nicp;
      if (nicp != NULL
	  && nicp != LF_VERIFY_OTHER_END
	  && nicp->host == A.my_host) {
	if (A.debug > 1) {
	  fma_log("%s assigned to %s",
	      lf_node_string(LF_NODE(xp), p),
	      lf_node_string(LF_NODE(nicp), xp->verifiers[p].ver_port));
	}
	rc = fma_define_link_verify_probe(xp, p);
	if (rc == 0) {
	  ++num;
	}
      }
    }
  }
  if (A.debug) fma_log("Verifying %d links", num);

  /* set up verify probes for other NICs in the fabric */
  fma_create_nic_verify_probes(fp);

  /* set up probes of empty xbar ports */
  fma_create_empty_xbar_port_probes(fp);

  /* start a probe on each disconnected NIC port */
  fma_create_disconnected_nic_probes();
}

/*
 * Start a verify going on each disconnected NIC port
 */
static void
fma_create_disconnected_nic_probes()
{
  struct fma_nic_info *nip;
  struct lf_nic *nicp;
  int p;
  int n;

  /* Check each of our NICs to see if any disconnected */
  for (n=0; n<A.myri->num_nics; ++n) {
    nip = A.myri->nic_info[n];
    nicp = nip->nic_ptr;

    /* use myri_info.num_ports since nicp might be NULL */
    for (p=0; p<nip->myri_info.num_ports; ++p) {

      /* If either nicp is NULL or topo link is null, this NIC port
       * is disconnected
       */
      if (nicp->topo_ports[p] == NULL) {
	fma_define_empty_port_probe(LF_NODE(nicp), p);
      }
    }
  }
}
